Reproducible workflow to assess differentially abundant ASVs and taxa between temperature treatments using Indicator Species Analysis and linear discriminant analysis (LDA) effect size (LEfSe).
knitr::opts_chunk$set(echo = TRUE, eval = FALSE)
set.seed(119)
#library(conflicted)
#pacman::p_depends(microbiomeMarker, local = TRUE)
#pacman::p_depends_reverse(microbiomeMarker, local = TRUE)
library(phyloseq); packageVersion("phyloseq")
library(Biostrings); packageVersion("Biostrings")
pacman::p_load(tidyverse, metacoder, patchwork,
agricolae, labdsv, naniar, codefolder, ape,
pairwiseAdonis, microbiome, seqRFLP, DT, microbiomeMarker,
microbiomeMarker, reactable, downloadthis, captioner,
install = FALSE, update = FALSE)
options(scipen=999)
knitr::opts_current$get(c(
"cache",
"cache.path",
"cache.rebuild",
"dependson",
"autodep"
))### COmmon formatting scripts
### NOTE: captioner.R must be read BEFORE captions_XYZ.R
source("assets/captioner/captioner.R")
source("assets/captioner/captions/captions_da.R")
source("assets/reactable/download_this_fun.R")
source("assets/reactable/styles.R")
source("assets/reactable/table_functions/da.R")This workflow contains ASV differential abundance analysis for the high temperature data sets. In order to run the workflow, you …
In this workflow, we use the labdsv package (Roberts and Roberts 2016) to run Dufrene-Legendre Indicator Species Analysis and the microbiomeMarker package (Cao 2020) to run linear discriminant analysis (LDA) effect size (LEfSe) (Segata et al. 2011).
Indicator Analysis calculates the indicator value (fidelity and relative abundance) of species in clusters or types.
indval_pval <- 0.05for (i in samp_ps) {
tmp_get <- get(i)
tmp_df <- data.frame(otu_table(tmp_get))
tmp_df <- tmp_df[, which(colSums(tmp_df) != 0)]
tmp_row_names <- row.names(tmp_df)
tmp_row_names <- tmp_row_names %>%
stringr::str_replace("[A-Z]{2}_S[0-9]{2}_T[0-9]{3}_", "") %>%
stringr::str_replace("_[A-Z]{3}_", "_")
tmp_df <- tmp_df %>% tibble::add_column(tmp_row_names, .before = 1)
tmp_name <- purrr::map_chr(i, ~ paste0(., "_seq_tab"))
assign(tmp_name, tmp_df)
rm(list = ls(pattern = "tmp_"))
}set.seed(1191)
for (i in samp_ps) {
tmp_year <- stringr::str_extract(i, "Y[0-9]")
tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab")))
tmp_iva <- indval(tmp_get[,-1], tmp_get[,1])
tmp_gr <- tmp_iva$maxcls[tmp_iva$pval <= indval_pval]
tmp_iv <- tmp_iva$indcls[tmp_iva$pval <= indval_pval]
tmp_pv <- tmp_iva$pval[tmp_iva$pval <= indval_pval]
tmp_fr <- apply(tmp_get[,-1] > 0, 2, sum)[tmp_iva$pval <= indval_pval]
tmp_sum <- data.frame(group = tmp_gr, indval = tmp_iv,
pval = tmp_pv, freq = tmp_fr)
tmp_sum <- tmp_sum[order(tmp_sum$group, -tmp_sum$indval),]
tmp_tax_df <- data.frame(tax_table(get(i)))
tmp_tax_df$ASV_ID <- NULL
tmp_sum_tax <- merge(tmp_sum, tmp_tax_df, by = "row.names", all = TRUE)
tmp_sum_tax <- tmp_sum_tax[!(is.na(tmp_sum_tax$group)),]
class(tmp_sum_tax$group) <- "character"
tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^1$", paste(tmp_year, "CTL", sep = "_"))
tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^2$", paste(tmp_year, "N", sep = "_"))
tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^3$", paste(tmp_year, "NP", sep = "_"))
tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^4$", paste(tmp_year, "P", sep = "_"))
tmp_sum_tax <- tmp_sum_tax %>% dplyr::rename("ASV_ID" = "Row.names")
tmp_sum_tax <- tmp_sum_tax[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum_tax$ASV_ID))),]
tmp_sum_tax$ASV_ID <- as.character(tmp_sum_tax$ASV_ID)
tmp_sum.prob.corrected <- p.adjust(tmp_sum$pval, "bonferroni")
tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_indval_summary"))
assign(tmp_res_name, tmp_sum_tax)
rm(list = ls(pattern = "tmp_"))
}
for (i in objects(pattern = paste("ssu_ps.*_indval_summary"))){
cat(i, nrow(get(i)), "\n")
}Now we can save a few files and display the data.
for (i in samp_ps) {
tmp_year <- stringr::str_extract(i, "Y[0-9]")
tmp_ctl <- paste(tmp_year, "CTL", sep = "_")
tmp_n <- paste(tmp_year, "N", sep = "_")
tmp_p <- paste(tmp_year, "P", sep = "_")
tmp_np <- paste(tmp_year, "NP", sep = "_")
tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab")))
tmp_get[,1] <- NULL
tmp_df <- as.data.frame(t(tmp_get))
tmp_col_names <- colnames(tmp_df)
tmp_col_names <- tmp_col_names %>%
stringr::str_replace("[A-Z]{2}_S[0-9]{2}_T[0-9]{3}_", "") %>%
stringr::str_replace("_[A-Z]{3}_", "_")
colnames(tmp_df) <- tmp_col_names
#tmp_df$freq_all <- apply(tmp_df > 0, 1, sum)
tmp_df$freq_CTL <- apply(tmp_df[ , (names(tmp_df) %in% tmp_ctl)] > 0, 1, sum)
tmp_df$freq_N <- apply(tmp_df[ , (names(tmp_df) %in% tmp_n)] > 0, 1, sum)
tmp_df$freq_P <- apply(tmp_df[ , (names(tmp_df) %in% tmp_p)] > 0, 1, sum)
tmp_df$freq_NP <- apply(tmp_df[ , (names(tmp_df) %in% tmp_np)] > 0, 1, sum)
tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in%
c(tmp_ctl, tmp_n, tmp_p, tmp_np))])
tmp_df$reads_CTL <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_ctl)])
tmp_df$reads_N <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_n)])
tmp_df$reads_P <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_p)])
tmp_df$reads_NP <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_np)])
tmp_df <- tmp_df[,!grepl("^[Y]", names(tmp_df))]
tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID")
tmp_get_indval <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary")))
tmp_merge_df <- merge(tmp_df, tmp_get_indval, by = "ASV_ID", all = FALSE)
tmp_merge_df <- tmp_merge_df[,c(1,11:14,2:10,15:21)]
tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ]
tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_indval_final"))
assign(tmp_merge_name, tmp_merge_df)
rm(list = ls(pattern = "tmp_"))
}for (i in samp_ps) {
tmp_get <- get(i)
tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary")))
tmp_list <- tmp_tab[,1]
tmp_ps <- prune_taxa(tmp_list, tmp_get)
tmp_name <- purrr::map_chr(i, ~ paste0(., "_ind"))
assign(tmp_name, tmp_ps)
tmp_ps@phy_tree <- NULL
tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps)
tmp_ps_tree <- rtree(ntaxa(tmp_ps), rooted = TRUE,
tip.label = taxa_names(tmp_ps))
tmp_ps <- merge_phyloseq(tmp_ps, sample_data, tmp_ps_tree)
assign(tmp_name, tmp_ps)
rm(list = ls(pattern = "tmp_"))
}(16S rRNA) Table 1 | Summary results of Indicator Analysis for each data set.
(16S rRNA) Table 2 | (Year 0) Significant results of Indicator Analysis for the FULL data set.
(16S rRNA) Table 3 | (Year 1) Significant results of Indicator Analysis for the FULL data set.
(16S rRNA) Table 4 | (Year 4) Significant results of Indicator Analysis for the FULL data set.
(16S rRNA) Table 5 | (Year 0) Significant results of Indicator Analysis for the Arbitrary filtered ASV data set.
(16S rRNA) Table 6 | (Year 1) Significant results of Indicator Analysis for the Arbitrary filtered ASV data set.
(16S rRNA) Table 7 | (Year 4) Significant results of Indicator Analysis for the Arbitrary filtered ASV data set.
(16S rRNA) Table 8 | (Year 0) Significant results of Indicator Analysis for the PERfect filtered data set.
(16S rRNA) Table 9 | (Year 1) Significant results of Indicator Analysis for the PERfect filtered data set.
(16S rRNA) Table 10 | (Year 4) Significant results of Indicator Analysis for the PERfect filtered data set.
(16S rRNA) Table 11 | (Year 0) Significant results of Indicator Analysis for the PIME filtered data set.
(16S rRNA) Table 12 | (Year 1) Significant results of Indicator Analysis for the PIME filtered data set.
(16S rRNA) Table 13 | (Year 4) Significant results of Indicator Analysis for the PIME filtered data set.
(16S rRNA) Table 14 | (FULL data set) Combined results of IndVal analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(16S rRNA) Table 15 | (Arbitrary filtered) Combined results of IndVal analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(16S rRNA) Table 16 | (PERFect filtered) Combined results of IndVal analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(16S rRNA) Table 17 | (PIME filtered) Combined results of IndVal analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
Linear discriminant analysis (LDA) effect size (LEfSe) is a method to support high-dimensional class comparisons with a particular focus on microbial community analyses. LEfSe determines the features (organisms, clades, operational taxonomic units, genes, or functions) most likely to explain differences between classes by coupling standard tests for statistical significance with additional tests encoding biological consistency and effect relevance. Class comparison methods typically predict biomarkers consisting of features that violate a null hypothesis of no difference between classes. The effect size provides an estimation of the magnitude of the observed phenomenon due to each characterizing feature and it is thus a valuable tool for ranking the relevance of different biological aspects and for addressing further investigations and analyses.
First, a little data tidying and subsetting only ASV data from the taxonomy table.
for (i in samp_ps) {
tmp_get <- get(i)
tax_table(tmp_get) <- tax_table(tmp_get)[,c(1:6)]
tax_table(tmp_get) <- cbind(tax_table(tmp_get),
rownames(tax_table(tmp_get)))
colnames(tax_table(tmp_get)) <- c("Kingdom", "Phylum", "Class", "Order",
"Family", "Genus", "Species")
### ASV Only
tmp_asv <- tmp_get
tax_table(tmp_asv) <- tax_table(tmp_asv)[,c(7)]
tmp_name_asv <- purrr::map_chr(i, ~ paste0(., "_rf_asv"))
assign(tmp_name_asv, tmp_asv)
### NO ASVs
tax_table(tmp_get) <- tax_table(tmp_get)[,c(1:6)]
tmp_name <- purrr::map_chr(i, ~ paste0(., "_rf_all"))
assign(tmp_name, tmp_get)
rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_rf")lefse_lda <- 2
lefse_kw <- 0.05
lefse_wc <- 0.05for (i in samp_ps) {
tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_asv")))
tmp_lefse <- run_lefse(ps = tmp_get, norm = "CPM", group = "TREAT", strict = "2",
lda_cutoff = lefse_lda,
kw_cutoff = lefse_kw,
wilcoxon_cutoff = lefse_wc,
bootstrap_n = 30, bootstrap_fraction = 2/3,
sample_min = 10, multigrp_strat = FALSE, curv = FALSE)
tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv"))
assign(tmp_name, tmp_lefse)
rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_lefse_asv")
for (i in samp_ps) {
tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_all")))
tmp_lefse <- run_lefse(ps = tmp_get, norm = "CPM", group = "TREAT", strict = "2",
lda_cutoff = lefse_lda,
kw_cutoff = lefse_kw,
wilcoxon_cutoff = lefse_wc,
bootstrap_n = 30, bootstrap_fraction = 2/3,
sample_min = 10, multigrp_strat = FALSE, curv = FALSE)
tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_all"))
assign(tmp_name, tmp_lefse)
rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_lefse_asv$")
objects(pattern = "_lefse_all$")for (i in samp_ps) {
tmp_o_tax <- data.frame(tax_table(get(i)))
tmp_o_tax$ASV_ID <- NULL
tmp_o_tax <- tmp_o_tax %>% tibble::rownames_to_column("ASV_ID")
tmp_year <- stringr::str_extract(i, "Y[0-9]")
tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv")))
tmp_mt <- data.frame(marker_table(tmp_get))
tmp_mt$feature <- gsub('s__', '', tmp_mt$feature)
tmp_mt <- tmp_mt %>% dplyr::rename(c("ASV_ID" = 1, "group" = 2, "pval" = 4)) %>%
tibble::remove_rownames()
tmp_sum <- dplyr::left_join(tmp_mt, tmp_o_tax, by = "ASV_ID")
tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^", paste(tmp_year, "_", sep = ""))
#tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^3$", "W3")
tmp_sum <- tmp_sum[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum$ASV_ID))),]
tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv_summary"))
assign(tmp_res_name, tmp_sum)
rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_lefse_asv_summary")
ssu_ps_filt_Y0_lefse_asv_summaryNow we can save a few files and display the data.
for (i in samp_ps) {
tmp_year <- stringr::str_extract(i, "Y[0-9]")
tmp_ctl <- paste(tmp_year, "CTL", sep = "_")
tmp_n <- paste(tmp_year, "N", sep = "_")
tmp_p <- paste(tmp_year, "P", sep = "_")
tmp_np <- paste(tmp_year, "NP", sep = "_")
tmp_get <- get(i)
tmp_df <- data.frame(t(otu_table(tmp_get)))
#tmp_get[,1] <- NULL
#tmp_df <- as.data.frame(t(tmp_otu))
tmp_col_names <- colnames(tmp_df)
tmp_col_names <- tmp_col_names %>%
stringr::str_replace("[A-Z]{2}_S[0-9]{2}_T[0-9]{3}_", "") %>%
stringr::str_replace("_[A-Z]{3}_", "_")
colnames(tmp_df) <- tmp_col_names
tmp_df$freq <- apply(tmp_df > 0, 1, sum)
tmp_df$freq_CTL <- apply(tmp_df[ , (names(tmp_df) %in% tmp_ctl)] > 0, 1, sum)
tmp_df$freq_N <- apply(tmp_df[ , (names(tmp_df) %in% tmp_n)] > 0, 1, sum)
tmp_df$freq_P <- apply(tmp_df[ , (names(tmp_df) %in% tmp_p)] > 0, 1, sum)
tmp_df$freq_NP <- apply(tmp_df[ , (names(tmp_df) %in% tmp_np)] > 0, 1, sum)
tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in%
c(tmp_ctl, tmp_n, tmp_p, tmp_np))])
tmp_df$reads_CTL <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_ctl)])
tmp_df$reads_N <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_n)])
tmp_df$reads_P <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_p)])
tmp_df$reads_NP <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_np)])
tmp_df <- tmp_df[,!grepl("^[Y]", names(tmp_df))]
tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID")
tmp_get_lefse <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv_summary")))
tmp_merge_df <- merge(tmp_df, tmp_get_lefse, by = "ASV_ID", all = FALSE)
tmp_merge_df$padj <- NULL # remove since value unchanged
tmp_merge_df <- tmp_merge_df[,c(1,12:14,2:11,15:21)]
tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ]
tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv_final"))
assign(tmp_merge_name, tmp_merge_df)
rm(list = ls(pattern = "_lefse_final"))
}
objects(pattern = "_lefse_asv_final")
ssu_ps_filt_Y0_lefse_asv_finalSave a new phyloseq object
for (i in samp_ps) {
tmp_get <- get(i)
tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv_summary")))
tmp_list <- tmp_tab[,1]
tmp_ps <- prune_taxa(tmp_list, tmp_get)
tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv_ps"))
assign(tmp_name, tmp_ps)
tmp_ps@phy_tree <- NULL
tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps)
tmp_ps_tree <- rtree(ntaxa(tmp_ps), rooted = TRUE,
tip.label = taxa_names(tmp_ps))
tmp_ps <- merge_phyloseq(tmp_ps, sample_data, tmp_ps_tree)
assign(tmp_name, tmp_ps)
rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_lefse_asv_ps$")(16S rRNA) Table 18 | Summary results of LEfSe Analysis for each data set.
(16S rRNA) Table 19 | (Year 0) Significant results of the LEfSe ASV Analysis for the FULL data set.
(16S rRNA) Table 20 | (Year 1) Significant results of the LEfSe ASV Analysis for the FULL data set.
(16S rRNA) Table 21 | (Year 4) Significant results of the LEfSe ASV Analysis for the FULL data set.
(16S rRNA) Table 22 | (Year 0) Significant results of the LEfSe ASV Analysis for the Arbitrary filtered ASV data set.
(16S rRNA) Table 23 | (Year 1) Significant results of the LEfSe ASV Analysis for the Arbitrary filtered ASV data set.
(16S rRNA) Table 24 | (Year 4) Significant results of the LEfSe ASV Analysis for the Arbitrary filtered ASV data set.
(16S rRNA) Table 25 | (Year 0) Significant results of the LEfSe ASV Analysis for the PERfect filtered data set.
(16S rRNA) Table 26 | (Year 1) Significant results of the LEfSe ASV Analysis for the PERfect filtered data set.
(16S rRNA) Table 27 | (Year 4) Significant results of the LEfSe ASV Analysis for the PERfect filtered data set.
(16S rRNA) Table 28 | (Year 0) Significant results of the LEfSe ASV Analysis for the PIME filtered data set.
(16S rRNA) Table 29 | (Year 1) Significant results of the LEfSe ASV Analysis for the PIME filtered data set.
(16S rRNA) Table 30 | (Year 4) Significant results of the LEfSe ASV Analysis for the PIME filtered data set.
(16S rRNA) Table 31 | (FULL data set) Combined results of LEfSe analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(16S rRNA) Table 32 | (Arbitrary filtered) Combined results of LEfSe analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(16S rRNA) Table 33 | (PERFect filtered) Combined results of LEfSe analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(16S rRNA) Table 34 | (PIME filtered) Combined results of LEfSe analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(16S rRNA) Table 35 | (Year 0) Significant results of the LEfSe Marker Analysis for the FULL data set.
(16S rRNA) Table 36 | (Year 1) Significant results of the LEfSe Marker Analysis for the FULL data set.
(16S rRNA) Table 37 | (Year 4) Significant results of the LEfSe Marker Analysis for the FULL data set.
(16S rRNA) Table 38 | (Year 0) Significant results of the LEfSe Marker Analysis for the Arbitrary filtered ASV data set.
(16S rRNA) Table 39 | (Year 1) Significant results of the LEfSe Marker Analysis for the Arbitrary filtered ASV data set.
(16S rRNA) Table 40 | (Year 4) Significant results of the LEfSe Marker Analysis for the Arbitrary filtered ASV data set.
(16S rRNA) Table 41 | (Year 0) Significant results of the LEfSe Marker Analysis for the PERfect filtered data set.
(16S rRNA) Table 42 | (Year 1) Significant results of the LEfSe Marker Analysis for the PERfect filtered data set.
(16S rRNA) Table 43 | (Year 4) Significant results of the LEfSe Marker Analysis for the PERfect filtered data set.
(16S rRNA) Table 44 | (Year 0) Significant results of the LEfSe Marker Analysis for the PIME filtered data set.
(16S rRNA) Table 45 | (Year 1) Significant results of the LEfSe Marker Analysis for the PIME filtered data set.
(16S rRNA) Table 46 | (Year 4) Significant results of the LEfSe Marker Analysis for the PIME filtered data set.
Indicator Analysis calculates the indicator value (fidelity and relative abundance) of species in clusters or types.
indval_pval <- 0.05for (i in samp_ps) {
tmp_get <- get(i)
tmp_df <- data.frame(otu_table(tmp_get))
tmp_df <- tmp_df[, which(colSums(tmp_df) != 0)]
tmp_row_names <- row.names(tmp_df)
tmp_row_names <- tmp_row_names %>%
stringr::str_replace("[A-Z]{2}_S[0-9]{2}_T[0-9]{3}_", "") %>%
stringr::str_replace("_[A-Z]{3}_", "_")
tmp_df <- tmp_df %>% tibble::add_column(tmp_row_names, .before = 1)
tmp_name <- purrr::map_chr(i, ~ paste0(., "_seq_tab"))
assign(tmp_name, tmp_df)
rm(list = ls(pattern = "tmp_"))
}set.seed(1191)
for (i in samp_ps) {
tmp_year <- stringr::str_extract(i, "Y[0-9]")
tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab")))
tmp_iva <- indval(tmp_get[,-1], tmp_get[,1])
tmp_gr <- tmp_iva$maxcls[tmp_iva$pval <= indval_pval]
tmp_iv <- tmp_iva$indcls[tmp_iva$pval <= indval_pval]
tmp_pv <- tmp_iva$pval[tmp_iva$pval <= indval_pval]
tmp_fr <- apply(tmp_get[,-1] > 0, 2, sum)[tmp_iva$pval <= indval_pval]
tmp_sum <- data.frame(group = tmp_gr, indval = tmp_iv,
pval = tmp_pv, freq = tmp_fr)
tmp_sum <- tmp_sum[order(tmp_sum$group, -tmp_sum$indval),]
tmp_tax_df <- data.frame(tax_table(get(i)))
tmp_tax_df$ASV_ID <- NULL
tmp_sum_tax <- merge(tmp_sum, tmp_tax_df, by = "row.names", all = TRUE)
tmp_sum_tax <- tmp_sum_tax[!(is.na(tmp_sum_tax$group)),]
class(tmp_sum_tax$group) <- "character"
tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^1$", paste(tmp_year, "CTL", sep = "_"))
tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^2$", paste(tmp_year, "N", sep = "_"))
tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^3$", paste(tmp_year, "NP", sep = "_"))
tmp_sum_tax$group <- stringr::str_replace(tmp_sum_tax$group, "^4$", paste(tmp_year, "P", sep = "_"))
tmp_sum_tax <- tmp_sum_tax %>% dplyr::rename("ASV_ID" = "Row.names")
tmp_sum_tax <- tmp_sum_tax[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum_tax$ASV_ID))),]
tmp_sum_tax$ASV_ID <- as.character(tmp_sum_tax$ASV_ID)
tmp_sum.prob.corrected <- p.adjust(tmp_sum$pval, "bonferroni")
tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_indval_summary"))
assign(tmp_res_name, tmp_sum_tax)
rm(list = ls(pattern = "tmp_"))
}
for (i in objects(pattern = paste("its_ps.*_indval_summary"))){
cat(i, nrow(get(i)), "\n")
}Now we can save a few files and display the data.
for (i in samp_ps) {
tmp_year <- stringr::str_extract(i, "Y[0-9]")
tmp_ctl <- paste(tmp_year, "CTL", sep = "_")
tmp_n <- paste(tmp_year, "N", sep = "_")
tmp_p <- paste(tmp_year, "P", sep = "_")
tmp_np <- paste(tmp_year, "NP", sep = "_")
tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_seq_tab")))
tmp_get[,1] <- NULL
tmp_df <- as.data.frame(t(tmp_get))
tmp_col_names <- colnames(tmp_df)
tmp_col_names <- tmp_col_names %>%
stringr::str_replace("[A-Z]{2}_S[0-9]{2}_T[0-9]{3}_", "") %>%
stringr::str_replace("_[A-Z]{3}_", "_")
colnames(tmp_df) <- tmp_col_names
#tmp_df$freq_all <- apply(tmp_df > 0, 1, sum)
tmp_df$freq_CTL <- apply(tmp_df[ , (names(tmp_df) %in% tmp_ctl)] > 0, 1, sum)
tmp_df$freq_N <- apply(tmp_df[ , (names(tmp_df) %in% tmp_n)] > 0, 1, sum)
tmp_df$freq_P <- apply(tmp_df[ , (names(tmp_df) %in% tmp_p)] > 0, 1, sum)
tmp_df$freq_NP <- apply(tmp_df[ , (names(tmp_df) %in% tmp_np)] > 0, 1, sum)
tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in%
c(tmp_ctl, tmp_n, tmp_p, tmp_np))])
tmp_df$reads_CTL <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_ctl)])
tmp_df$reads_N <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_n)])
tmp_df$reads_P <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_p)])
tmp_df$reads_NP <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_np)])
tmp_df <- tmp_df[,!grepl("^[Y]", names(tmp_df))]
tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID")
tmp_get_indval <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary")))
tmp_merge_df <- merge(tmp_df, tmp_get_indval, by = "ASV_ID", all = FALSE)
tmp_merge_df <- tmp_merge_df[,c(1,11:14,2:10,15:21)]
tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ]
tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_indval_final"))
assign(tmp_merge_name, tmp_merge_df)
rm(list = ls(pattern = "tmp_"))
}for (i in samp_ps) {
tmp_get <- get(i)
tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_indval_summary")))
tmp_list <- tmp_tab[,1]
tmp_ps <- prune_taxa(tmp_list, tmp_get)
tmp_name <- purrr::map_chr(i, ~ paste0(., "_ind"))
assign(tmp_name, tmp_ps)
tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps)
tmp_ps <- merge_phyloseq(tmp_ps, sample_data)
assign(tmp_name, tmp_ps)
rm(list = ls(pattern = "tmp_"))
}(ITS) Table 1 | Summary results of Indicator Analysis for each data set.
(ITS) Table 5 | (Year 0) Significant results of Indicator Analysis for the Arbitrary filtered ASV data set.
(ITS) Table 6 | (Year 1) Significant results of Indicator Analysis for the Arbitrary filtered ASV data set.
(ITS) Table 7 | (Year 4) Significant results of Indicator Analysis for the Arbitrary filtered ASV data set.
(ITS) Table 8 | (Year 0) Significant results of Indicator Analysis for the PERfect filtered data set.
(ITS) Table 9 | (Year 1) Significant results of Indicator Analysis for the PERfect filtered data set.
(ITS) Table 10 | (Year 4) Significant results of Indicator Analysis for the PERfect filtered data set.
(ITS) Table 11 | (Year 0) Significant results of Indicator Analysis for the PIME filtered data set.
(ITS) Table 12 | (Year 1) Significant results of Indicator Analysis for the PIME filtered data set.
(ITS) Table 13 | (Year 4) Significant results of Indicator Analysis for the PIME filtered data set.
(ITS) Table 14 | (FULL data set) Combined results of IndVal analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(ITS) Table 15 | (Arbitrary filtered) Combined results of IndVal analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(ITS) Table 16 | (PERFect filtered) Combined results of IndVal analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(ITS) Table 17 | (PIME filtered) Combined results of IndVal analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
Linear discriminant analysis (LDA) effect size (LEfSe) is a method to support high-dimensional class comparisons with a particular focus on microbial community analyses. LEfSe determines the features (organisms, clades, operational taxonomic units, genes, or functions) most likely to explain differences between classes by coupling standard tests for statistical significance with additional tests encoding biological consistency and effect relevance. Class comparison methods typically predict biomarkers consisting of features that violate a null hypothesis of no difference between classes. The effect size provides an estimation of the magnitude of the observed phenomenon due to each characterizing feature and it is thus a valuable tool for ranking the relevance of different biological aspects and for addressing further investigations and analyses.
First, a little data tidying and subsetting only ASV data from the taxonomy table.
for (i in samp_ps) {
tmp_get <- get(i)
tax_table(tmp_get) <- tax_table(tmp_get)[,c(1:6)]
tax_table(tmp_get) <- cbind(tax_table(tmp_get),
rownames(tax_table(tmp_get)))
colnames(tax_table(tmp_get)) <- c("Kingdom", "Phylum", "Class", "Order",
"Family", "Genus", "Species")
### ASV Only
tmp_asv <- tmp_get
tax_table(tmp_asv) <- tax_table(tmp_asv)[,c(7)]
tmp_name_asv <- purrr::map_chr(i, ~ paste0(., "_rf_asv"))
assign(tmp_name_asv, tmp_asv)
### NO ASVs
tax_table(tmp_get) <- tax_table(tmp_get)[,c(1:6)]
tmp_name <- purrr::map_chr(i, ~ paste0(., "_rf_all"))
assign(tmp_name, tmp_get)
rm(list = ls(pattern = "tmp_"))
}
objects(pattern = "_rf")lefse_lda <- 2
lefse_kw <- 0.05
lefse_wc <- 0.05for (i in samp_ps) {
tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_asv")))
tmp_lefse <- run_lefse(ps = tmp_get, norm = "CPM", group = "TREAT", strict = "2",
lda_cutoff = lefse_lda,
kw_cutoff = lefse_kw,
wilcoxon_cutoff = lefse_wc,
bootstrap_n = 30, bootstrap_fraction = 2/3,
sample_min = 10, multigrp_strat = FALSE, curv = FALSE)
tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv"))
assign(tmp_name, tmp_lefse)
rm(list = ls(pattern = "tmp_"))
}
objects(pattern = paste("its_ps.*_lefse_asv$"))
for (i in samp_ps) {
tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_rf_all")))
tmp_lefse <- run_lefse(ps = tmp_get, norm = "CPM", group = "TREAT", strict = "2",
lda_cutoff = lefse_lda,
kw_cutoff = lefse_kw,
wilcoxon_cutoff = lefse_wc,
bootstrap_n = 30, bootstrap_fraction = 2/3,
sample_min = 10, multigrp_strat = FALSE, curv = FALSE)
tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_all"))
assign(tmp_name, tmp_lefse)
rm(list = ls(pattern = "tmp_"))
}
objects(pattern = paste("its_ps.*_lefse_all$"))for (i in samp_ps) {
tmp_o_tax <- data.frame(tax_table(get(i)))
tmp_o_tax$ASV_ID <- NULL
tmp_o_tax <- tmp_o_tax %>% tibble::rownames_to_column("ASV_ID")
tmp_year <- stringr::str_extract(i, "Y[0-9]")
tmp_get <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv")))
tmp_mt <- data.frame(marker_table(tmp_get))
tmp_mt$feature <- gsub('s__', '', tmp_mt$feature)
tmp_mt <- tmp_mt %>% dplyr::rename(c("ASV_ID" = 1, "group" = 2, "pval" = 4)) %>%
tibble::remove_rownames()
tmp_sum <- dplyr::left_join(tmp_mt, tmp_o_tax, by = "ASV_ID")
tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^", paste(tmp_year, "_", sep = ""))
#tmp_sum$group <- stringr::str_replace(tmp_sum$group, "^3$", "W3")
tmp_sum <- tmp_sum[order(as.numeric(gsub("[A-Z]{3}", "", tmp_sum$ASV_ID))),]
tmp_res_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv_summary"))
assign(tmp_res_name, tmp_sum)
rm(list = ls(pattern = "tmp_"))
}
objects(pattern = paste("its_ps.*_lefse_asv_summary"))Now we can save a few files and display the data.
for (i in samp_ps) {
tmp_year <- stringr::str_extract(i, "Y[0-9]")
tmp_ctl <- paste(tmp_year, "CTL", sep = "_")
tmp_n <- paste(tmp_year, "N", sep = "_")
tmp_p <- paste(tmp_year, "P", sep = "_")
tmp_np <- paste(tmp_year, "NP", sep = "_")
tmp_get <- get(i)
tmp_df <- data.frame(t(otu_table(tmp_get)))
#tmp_get[,1] <- NULL
#tmp_df <- as.data.frame(t(tmp_otu))
tmp_col_names <- colnames(tmp_df)
tmp_col_names <- tmp_col_names %>%
stringr::str_replace("[A-Z]{2}_S[0-9]{2}_T[0-9]{3}_", "") %>%
stringr::str_replace("_[A-Z]{3}_", "_")
colnames(tmp_df) <- tmp_col_names
tmp_df$freq <- apply(tmp_df > 0, 1, sum)
tmp_df$freq_CTL <- apply(tmp_df[ , (names(tmp_df) %in% tmp_ctl)] > 0, 1, sum)
tmp_df$freq_N <- apply(tmp_df[ , (names(tmp_df) %in% tmp_n)] > 0, 1, sum)
tmp_df$freq_P <- apply(tmp_df[ , (names(tmp_df) %in% tmp_p)] > 0, 1, sum)
tmp_df$freq_NP <- apply(tmp_df[ , (names(tmp_df) %in% tmp_np)] > 0, 1, sum)
tmp_df$reads_total <- base::rowSums(tmp_df[ , (names(tmp_df) %in%
c(tmp_ctl, tmp_n, tmp_p, tmp_np))])
tmp_df$reads_CTL <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_ctl)])
tmp_df$reads_N <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_n)])
tmp_df$reads_P <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_p)])
tmp_df$reads_NP <- base::rowSums(tmp_df[ , (names(tmp_df) %in% tmp_np)])
tmp_df <- tmp_df[,!grepl("^[Y]", names(tmp_df))]
tmp_df <- tmp_df %>% tibble::rownames_to_column("ASV_ID")
tmp_get_lefse <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv_summary")))
tmp_merge_df <- merge(tmp_df, tmp_get_lefse, by = "ASV_ID", all = FALSE)
tmp_merge_df$padj <- NULL # remove since value unchanged
tmp_merge_df <- tmp_merge_df[,c(1,12:14,2:11,15:21)]
tmp_merge_df <- tmp_merge_df[order(tmp_merge_df$reads_total, decreasing = TRUE), ]
tmp_merge_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv_final"))
assign(tmp_merge_name, tmp_merge_df)
rm(list = ls(pattern = "_lefse_final"))
}
objects(pattern = paste("its_ps.*_lefse_asv_final"))Save a new phyloseq object
for (i in samp_ps) {
tmp_get <- get(i)
tmp_tab <- get(purrr::map_chr(i, ~ paste0(., "_lefse_asv_summary")))
tmp_list <- tmp_tab[,1]
tmp_ps <- prune_taxa(tmp_list, tmp_get)
tmp_name <- purrr::map_chr(i, ~ paste0(., "_lefse_asv_ps"))
assign(tmp_name, tmp_ps)
tmp_ps <- prune_samples(sample_sums(tmp_ps) > 0, tmp_ps)
tmp_ps <- merge_phyloseq(tmp_ps, sample_data)
assign(tmp_name, tmp_ps)
rm(list = ls(pattern = "tmp_"))
}
objects(pattern = paste("its_ps.*_lefse_asv_ps$"))(ITS) Table 18 | Summary results of LEfSe Analysis for each data set.
(ITS) Table 19 | (Year 0) Significant results of the LEfSe ASV Analysis for the FULL data set.
(ITS) Table 20 | (Year 1) Significant results of the LEfSe ASV Analysis for the FULL data set.
(ITS) Table 21 | (Year 4) Significant results of the LEfSe ASV Analysis for the FULL data set.
(ITS) Table 22 | (Year 0) Significant results of the LEfSe ASV Analysis for the Arbitrary filtered ASV data set.
(ITS) Table 23 | (Year 1) Significant results of the LEfSe ASV Analysis for the Arbitrary filtered ASV data set.
(ITS) Table 24 | (Year 4) Significant results of the LEfSe ASV Analysis for the Arbitrary filtered ASV data set.
(ITS) Table 25 | (Year 0) Significant results of the LEfSe ASV Analysis for the PERfect filtered data set.
(ITS) Table 26 | (Year 1) Significant results of the LEfSe ASV Analysis for the PERfect filtered data set.
(ITS) Table 27 | (Year 4) Significant results of the LEfSe ASV Analysis for the PERfect filtered data set.
(ITS) Table 28 | (Year 0) Significant results of the LEfSe ASV Analysis for the PIME filtered data set.
(ITS) Table 29 | (Year 1) Significant results of the LEfSe ASV Analysis for the PIME filtered data set.
(ITS) Table 30 | (Year 4) Significant results of the LEfSe ASV Analysis for the PIME filtered data set.
(ITS) Table 31 | (FULL data set) Combined results of LEfSe analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(ITS) Table 32 | (Arbitrary filtered) Combined results of LEfSe analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(ITS) Table 33 | (PERFect filtered) Combined results of LEfSe analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(ITS) Table 34 | (PIME filtered) Combined results of LEfSe analysis for each year showing which year and treatment an ASV was enriched in. Please note that NA for total reads and frequency DOES NOT indicate the ASV was absent for a given year, just that it was not significant.
(ITS) Table 35 | (Year 0) Significant results of the LEfSe Marker Analysis for the FULL data set.
(ITS) Table 36 | (Year 1) Significant results of the LEfSe Marker Analysis for the FULL data set.
(ITS) Table 37 | (Year 4) Significant results of the LEfSe Marker Analysis for the FULL data set.
(ITS) Table 38 | (Year 0) Significant results of the LEfSe Marker Analysis for the Arbitrary filtered ASV data set.
(ITS) Table 39 | (Year 1) Significant results of the LEfSe Marker Analysis for the Arbitrary filtered ASV data set.
(ITS) Table 40 | (Year 4) Significant results of the LEfSe Marker Analysis for the Arbitrary filtered ASV data set.
(ITS) Table 41 | (Year 0) Significant results of the LEfSe Marker Analysis for the PERfect filtered data set.
(ITS) Table 42 | (Year 1) Significant results of the LEfSe Marker Analysis for the PERfect filtered data set.
(ITS) Table 43 | (Year 4) Significant results of the LEfSe Marker Analysis for the PERfect filtered data set.
(ITS) Table 44 | (Year 0) Significant results of the LEfSe Marker Analysis for the PIME filtered data set.
(ITS) Table 45 | (Year 1) Significant results of the LEfSe Marker Analysis for the PIME filtered data set.
(ITS) Table 46 | (Year 4) Significant results of the LEfSe Marker Analysis for the PIME filtered data set.
The source code for this page can be accessed on GitHub by clicking this link.
Data generated in this workflow and the Rdata need to run the workflow can be accessed on figshare at 10.25573/data.16828249.
If you see mistakes or want to suggest changes, please create an issue on the source repository.
Text and figures are licensed under Creative Commons Attribution CC BY 4.0. Source code is available at https://github.com/agua-salud/nutrients/, unless otherwise noted. The figures that have been reused from other sources don't fall under this license and can be recognized by a note in their caption: "Figure from ...".